{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from functools import reduce"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Look-ahead-bias free data\n",
    "forecast_woLAB = pd.read_parquet('../data/Results/RF_wo_lookahead_raw_005.parquet')\n",
    "forecast_woLAB = forecast_woLAB[['permno','YearMonth',\n",
    "                                'RF_q1','RF_q2','RF_q3',\n",
    "                                'RF_y1','RF_y2'\n",
    "                    ]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Alternative Machine Learning Models\n",
    "f_abbr_list = [('OLS_pred','OLS'),\n",
    "               ('PLS_pred','PLS'),\n",
    "               ('LASSO_pred','LASSO'),\n",
    "               ('ENet_pred','ENet'),\n",
    "               ('RF_pred','RF'),\n",
    "               ('LGBM_pred','LGBM'),\n",
    "              ]\n",
    "abbr_list = list(map(lambda x: x[1], f_abbr_list))\n",
    "\n",
    "forecast_all = []\n",
    "for f,abbr in f_abbr_list:\n",
    "    RF = pd.read_parquet(f'../data/Results/ML_variants/{f}.parquet')\n",
    "    RF = RF[['permno','YearMonth',f'{abbr}_EPS_Q1',f'{abbr}_EPS_Q2',f'{abbr}_EPS_Q3',\n",
    "             f'{abbr}_EPS_Y1',f'{abbr}_EPS_Y2',]].set_index(['permno','YearMonth'])\n",
    "    forecast_all.append(RF)\n",
    "forecast_all = reduce(lambda x,y: pd.merge(x,y,on=['permno','YearMonth'],how='outer'),\n",
    "                      forecast_all)\n",
    "forecast_all.reset_index(inplace=True)\n",
    "\n",
    "## Composite\n",
    "for i in ['Q1','Q2','Q3','Y1','Y2']:\n",
    "    forecast_all[f'Composite_EPS_{i}'] = forecast_all[[f'OLS_EPS_{i}',f'PLS_EPS_{i}',f'LASSO_EPS_{i}',\n",
    "                                                   f'ENet_EPS_{i}',f'RF_EPS_{i}',f'LGBM_EPS_{i}']].mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "forecast_all = forecast_woLAB.merge(forecast_all, how='left', on=['permno','YearMonth'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "permno",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "YearMonth",
         "rawType": "datetime64[ns]",
         "type": "datetime"
        },
        {
         "name": "RF_q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "OLS_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "OLS_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "OLS_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "OLS_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "OLS_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "PLS_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "PLS_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "PLS_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "PLS_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "PLS_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LASSO_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LASSO_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LASSO_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LASSO_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LASSO_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "ENet_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "ENet_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "ENet_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "ENet_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "ENet_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "RF_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LGBM_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LGBM_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LGBM_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LGBM_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "LGBM_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "Composite_EPS_Q1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "Composite_EPS_Q2",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "Composite_EPS_Q3",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "Composite_EPS_Y1",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "Composite_EPS_Y2",
         "rawType": "float64",
         "type": "float"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "f756ced0-5df4-4bd2-898e-0943a27ae496",
       "rows": [
        [
         "0",
         "10015.0",
         "1986-01-31 00:00:00",
         null,
         null,
         null,
         "0.49843865435349927",
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null
        ],
        [
         "1",
         "10057.0",
         "1986-01-31 00:00:00",
         null,
         "0.053901564753037805",
         "0.09437144688481647",
         "0.634142368504043",
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null
        ],
        [
         "2",
         "10137.0",
         "1986-01-31 00:00:00",
         null,
         "0.9968590216307059",
         "0.7172978064739818",
         null,
         "3.8033429767315243",
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null
        ],
        [
         "3",
         "10145.0",
         "1986-01-31 00:00:00",
         "0.4620890643941777",
         "0.771296146156635",
         "0.8690427189988639",
         "3.9403022183281147",
         "3.807993268870117",
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null
        ],
        [
         "4",
         "10153.0",
         "1986-01-31 00:00:00",
         null,
         "-0.8546745427663563",
         "-0.7169338763087258",
         null,
         "-1.3369523287085563",
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null,
         null
        ]
       ],
       "shape": {
        "columns": 42,
        "rows": 5
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>permno</th>\n",
       "      <th>YearMonth</th>\n",
       "      <th>RF_q1</th>\n",
       "      <th>RF_q2</th>\n",
       "      <th>RF_q3</th>\n",
       "      <th>RF_y1</th>\n",
       "      <th>RF_y2</th>\n",
       "      <th>OLS_EPS_Q1</th>\n",
       "      <th>OLS_EPS_Q2</th>\n",
       "      <th>OLS_EPS_Q3</th>\n",
       "      <th>...</th>\n",
       "      <th>LGBM_EPS_Q1</th>\n",
       "      <th>LGBM_EPS_Q2</th>\n",
       "      <th>LGBM_EPS_Q3</th>\n",
       "      <th>LGBM_EPS_Y1</th>\n",
       "      <th>LGBM_EPS_Y2</th>\n",
       "      <th>Composite_EPS_Q1</th>\n",
       "      <th>Composite_EPS_Q2</th>\n",
       "      <th>Composite_EPS_Q3</th>\n",
       "      <th>Composite_EPS_Y1</th>\n",
       "      <th>Composite_EPS_Y2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10015.0</td>\n",
       "      <td>1986-01-31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.498439</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10057.0</td>\n",
       "      <td>1986-01-31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.053902</td>\n",
       "      <td>0.094371</td>\n",
       "      <td>0.634142</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10137.0</td>\n",
       "      <td>1986-01-31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.996859</td>\n",
       "      <td>0.717298</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.803343</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10145.0</td>\n",
       "      <td>1986-01-31</td>\n",
       "      <td>0.462089</td>\n",
       "      <td>0.771296</td>\n",
       "      <td>0.869043</td>\n",
       "      <td>3.940302</td>\n",
       "      <td>3.807993</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10153.0</td>\n",
       "      <td>1986-01-31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.854675</td>\n",
       "      <td>-0.716934</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.336952</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 42 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    permno  YearMonth     RF_q1     RF_q2     RF_q3     RF_y1     RF_y2  \\\n",
       "0  10015.0 1986-01-31       NaN       NaN       NaN  0.498439       NaN   \n",
       "1  10057.0 1986-01-31       NaN  0.053902  0.094371  0.634142       NaN   \n",
       "2  10137.0 1986-01-31       NaN  0.996859  0.717298       NaN  3.803343   \n",
       "3  10145.0 1986-01-31  0.462089  0.771296  0.869043  3.940302  3.807993   \n",
       "4  10153.0 1986-01-31       NaN -0.854675 -0.716934       NaN -1.336952   \n",
       "\n",
       "   OLS_EPS_Q1  OLS_EPS_Q2  OLS_EPS_Q3  ...  LGBM_EPS_Q1  LGBM_EPS_Q2  \\\n",
       "0         NaN         NaN         NaN  ...          NaN          NaN   \n",
       "1         NaN         NaN         NaN  ...          NaN          NaN   \n",
       "2         NaN         NaN         NaN  ...          NaN          NaN   \n",
       "3         NaN         NaN         NaN  ...          NaN          NaN   \n",
       "4         NaN         NaN         NaN  ...          NaN          NaN   \n",
       "\n",
       "   LGBM_EPS_Q3  LGBM_EPS_Y1  LGBM_EPS_Y2  Composite_EPS_Q1  Composite_EPS_Q2  \\\n",
       "0          NaN          NaN          NaN               NaN               NaN   \n",
       "1          NaN          NaN          NaN               NaN               NaN   \n",
       "2          NaN          NaN          NaN               NaN               NaN   \n",
       "3          NaN          NaN          NaN               NaN               NaN   \n",
       "4          NaN          NaN          NaN               NaN               NaN   \n",
       "\n",
       "   Composite_EPS_Q3  Composite_EPS_Y1  Composite_EPS_Y2  \n",
       "0               NaN               NaN               NaN  \n",
       "1               NaN               NaN               NaN  \n",
       "2               NaN               NaN               NaN  \n",
       "3               NaN               NaN               NaN  \n",
       "4               NaN               NaN               NaN  \n",
       "\n",
       "[5 rows x 42 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forecast_all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "forecast_all.to_csv('../data/Results/Look_Ahead_Bias_Free_Earnings_Forecast.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
